package com.jusyl.cristo.spider.smzdm;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class SmzdmProcessorBak implements PageProcessor{

	private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);
	private static List<String> urlList = new ArrayList<String>();
	
	public SmzdmProcessorBak(){
	}

	@Override
	public void process(Page page) {
		//获取页面信息以供分析参考
		System.out.println("开始进入Page");

		
		List<String> strs = page.getHtml().xpath("//a[@class='ask-author']/html()").all();
		for(String s : strs) {
			//寻找匹配的关键词
			if(s.contains("zijunwenlong")){
				System.out.println(s);
			}
			
			
		}
		System.out.println(page.getUrl());

	}

	@Override
	public Site getSite() {
		return site;
	}
	
	public static void main(String arg[]) {
		Spider.create(new SmzdmProcessorBak())
			.addUrl("https://wenda.so.com/q/1537423811216898")
			.thread(1).run();
	}
}
